{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Spring 2018 - Adam Ross Nelson - Modified/executed in Jupyter.\n",
    "# Fall   2017 - Adam Ross Nelson - Modified/executed in python.\n",
    "# Fall   2015 - Adam Ross Nelson - Originally exectued in Stata.\n",
    "#             - See: https:github.com/adamrossnelson/crossreg/blob/master/corrhandStata.do\n",
    "# Maintained at: https:github.com/adamrossnelson/crossreg\n",
    "\n",
    "# Use notebook calculates correlation without using -df.corr-\n",
    "# Can be used to assist when learning how to calculate by by hand. \n",
    "# Or, useful when double checking hand work."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    x  y\n",
       "0   2  3\n",
       "1  10  7\n",
       "2   9  6\n",
       "3   5  6\n",
       "4   8  6\n",
       "5   2  3\n",
       "6   4  3\n",
       "7   8  6"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import math as math\n",
    "import requests\n",
    "\n",
    "thedata = {'y':[3,7,6,6,6,3,3,6],\n",
    "          'x':[2,10,9,5,8,2,4,8]}\n",
    "\n",
    "df = pd.DataFrame(thedata)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>y_minus_ybar</th>\n",
       "      <th>x_minus_xbar</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>-4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>-4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>-2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    x  y  y_minus_ybar  x_minus_xbar\n",
       "0   2  3          -2.0          -4.0\n",
       "1  10  7           2.0           4.0\n",
       "2   9  6           1.0           3.0\n",
       "3   5  6           1.0          -1.0\n",
       "4   8  6           1.0           2.0\n",
       "5   2  3          -2.0          -4.0\n",
       "6   4  3          -2.0          -2.0\n",
       "7   8  6           1.0           2.0"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Individually calculate Y-bar (or Ysubi minus Ybar) | The error terms\n",
    "df = df.assign(y_minus_ybar=df['y'] - df['y'].mean())\n",
    "\n",
    "# Individually calculate X-bar (or Xsubi minus Xbar) | The error terms\n",
    "df = df.assign(x_minus_xbar=df['x'] - df['x'].mean())\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Covariance of x and y is  34.0000\n"
     ]
    }
   ],
   "source": [
    "# Generate the covariances; then use to calculate numerator\n",
    "# This term is stated as sum of the error terms (from above) multiplied together\n",
    "df = df.assign(cov_of_xy=df['y_minus_ybar'] * df['x_minus_xbar'])\n",
    "cov_of_xy_sum = df['cov_of_xy'].sum()\n",
    "\n",
    "print(\"Covariance of x and y is %8.4f\" %cov_of_xy_sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sum of y squared error is  20.0000\n",
      "Sum of x squared error is  70.0000\n"
     ]
    }
   ],
   "source": [
    "# Generate squared errors\n",
    "# The sum of squared error of y is stored in variable -sum_y_sq_err-\n",
    "df = df.assign(y_minus_ybar_sq=df['y_minus_ybar'] * df['y_minus_ybar'])\n",
    "sum_y_sq_err = df['y_minus_ybar_sq'].sum()\n",
    "\n",
    "# The sum of squared error of x is stored in variable -sum_x_sq_err-\n",
    "df = df.assign(x_minus_xbar_sq=df['x_minus_xbar'] * df['x_minus_xbar'])\n",
    "sum_x_sq_err = df['x_minus_xbar_sq'].sum()\n",
    "\n",
    "print(\"Sum of y squared error is %8.4f\" %sum_y_sq_err)\n",
    "print(\"Sum of x squared error is %8.4f\" %sum_x_sq_err)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>y_minus_ybar</th>\n",
       "      <th>x_minus_xbar</th>\n",
       "      <th>cov_of_xy</th>\n",
       "      <th>y_minus_ybar_sq</th>\n",
       "      <th>x_minus_xbar_sq</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    x  y  y_minus_ybar  x_minus_xbar  cov_of_xy  y_minus_ybar_sq  \\\n",
       "0   2  3          -2.0          -4.0        8.0              4.0   \n",
       "1  10  7           2.0           4.0        8.0              4.0   \n",
       "2   9  6           1.0           3.0        3.0              1.0   \n",
       "3   5  6           1.0          -1.0       -1.0              1.0   \n",
       "4   8  6           1.0           2.0        2.0              1.0   \n",
       "5   2  3          -2.0          -4.0        8.0              4.0   \n",
       "6   4  3          -2.0          -2.0        4.0              4.0   \n",
       "7   8  6           1.0           2.0        2.0              1.0   \n",
       "\n",
       "   x_minus_xbar_sq  \n",
       "0             16.0  \n",
       "1             16.0  \n",
       "2              9.0  \n",
       "3              1.0  \n",
       "4              4.0  \n",
       "5             16.0  \n",
       "6              4.0  \n",
       "7              4.0  "
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Display results\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The data correlation hand calculated results : 0.9086882225022429\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print('The correlation ''hand calculated'' results : ', end='')\n",
    "print((cov_of_xy_sum)/(math.sqrt(sum_y_sq_err * sum_x_sq_err)), end=('\\n' * 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9086882225022429\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Simplified, less verbose option not dependant on vars above\n",
    "print((df['cov_of_xy'].sum())/ \\\n",
    "math.sqrt((df['y_minus_ybar_sq'].sum()) * (df['x_minus_xbar_sq'].sum())), end=('\\n' * 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The \"python calculated\" correlation results : 0.9086882225022428\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# print using available corr method\n",
    "print('The \"python calculated\" correlation results : ' , end='')\n",
    "print(df['y'].corr(df['x']), end=('\\n' * 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Provide formula notes for reference:\n",
      " \n",
      "                        SUM(X - Xbar)(Y - Ybar)\n",
      "  Correlation =  ----------------------------------\n",
      "                  SQRT(SUM(X-Xbar)^2 SUM(Y-Ybar)^2)\n",
      "\n",
      "                             total covxy\n",
      "  Correlation =  ---------------------------------------\n",
      "                  SQRT(total xmxbarsq * total ymybarsq)\n",
      "\n",
      "                                 cov\n",
      "  Correlation =  ---------------------------------------\n",
      "                          SQRT( xmy * ymy )\n",
      "\n",
      "                               Cov(X,Y)\n",
      "  Correlation =  ---------------------------------------\n",
      "                        SQRT(Var(X) * Var(Y))\n",
      "\n",
      "                               Cov(X,Y)\n",
      "  Correlation =  ---------------------------------------\n",
      "                        SQRT(Var(X) * Var(Y))\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Provide formula notes for reference:\n",
    "formulas = requests.get('https://raw.githubusercontent.com/adamrossnelson/crossreg/master/formulas.txt')\n",
    "print('Provide formula notes for reference:')\n",
    "print(formulas.text)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
